
library(digest)


#### this is the code I used to anonymize my results for the replication package
#### note that the hashed details can be looked up using the revid, but by hashing them we require future users to use Wikipedia as a source, which
#### could be altered if future conditions require it; further we make these individuals no more visible than they are on wikipedia itself
#### all hashes made using sha-256 to minimize collisions but maintain comparability for open science purposes
#e.g.
#test <- c(1,2,3)
#sapply(test, digest, algo='sha256')

load('../processed_data/dataset1.RData')
## columns of interest are: userDF$editor, revDF.clean$editor, revDF.clean$editor_id_or_ip, revDF.clean$editorPlain, revDF.clean$editorUserpage


userDF$editor <- sapply(userDF$editor, digest, algo='sha256')
revDF.clean$editor <- sapply(revDF.clean$editor, digest, algo='sha256')
revDF.clean$editor_id_or_ip <- sapply(revDF.clean$editor_id_or_ip, digest, algo='sha256')
revDF.clean$editorPlain <- sapply(revDF.clean$editorPlain, digest, algo='sha256')
revDF.clean$editorUserpage <- sapply(revDF.clean$editorUserpage, digest, algo='sha256')
revDF.clean$userpage_title <- sapply(revDF.clean$userpage_title, digest, algo='sha256')
revDF.clean$editor_id <- sapply(revDF.clean$editor_id, digest, algo='sha256')

save.image("../processed_data/dataset1_anonymized.RData", version=2)

rm(list=ls())
load('../processed_data/dataset2.RData')

userDF$editor <- sapply(userDF$editor, digest, algo='sha256')
revDF.clean$editor <- sapply(revDF.clean$editor, digest, algo='sha256')
revDF.clean$editor_id_or_ip <- sapply(revDF.clean$editor_id_or_ip, digest, algo='sha256')
revDF.clean$editorPlain <- sapply(revDF.clean$editorPlain, digest, algo='sha256')
revDF.clean$editorUserpage <- sapply(revDF.clean$editorUserpage, digest, algo='sha256')
revDF.clean$userpage_title <- sapply(revDF.clean$userpage_title, digest, algo='sha256')
revDF.clean$editor_id <- sapply(revDF.clean$editor_id, digest, algo='sha256')
save.image("../processed_data/dataset2_anonymized.RData", version=2)

